set.seed(505)
f1 =c(runif(150,-20,30),runif(150,15,65),runif(150,50,100),runif(200,-20,100))
f2 =c(runif(150,-10,30),runif(150,40,80),runif(150,-30,10),runif(200,-35,100))
df=data.frame('x'=f1,'y'=f2)
# plotting
plot(f1, f2,cex=0.5,pch=19,main="orginal", col='darkgrey')
# Number of clusters
k = 3
# setup color for plotting
colors = RColorBrewer::brewer.pal(n = k, name = 'Paired')
# intial X coordinates of random centroids
index_centriods = sample(1:nrow(df),k)
intial_Centroids = df[index_centriods,]
# Plotting along with the Centroids
plot(f1, f2, col='darkgrey',cex=0.5,pch=19,ylab='y',xlab='x',main="Intial Centroids ")
points(x=intial_Centroids$x, y=intial_Centroids$y,cex=1.5,pch=23,col='red')
# assign the points to 3 clusters
assignment=function(df, centroids,k,iter,colors){
# creating new cols for computing euclidean distance to the centroids point
for (i in 1:k){
# euclidean distance = sqrt((x1 - x2)^2 - (y1 - y2)^2)
df[paste('Distance_from_',toString(i),sep='')] =
sqrt((df$x- centroids[i,1]) ** 2+
(df$y - centroids[i,2]) ** 2)}
# creating a new col for shows the closest centroids
df['closest'] = apply(df[2+1:k],1,FUN=function(x) which(x==min(x))[1])
# creating a new col for shows the color
df['color'] = apply(df['closest'],1,FUN = function(x) colors[x])
plot(df$x,df$y,col=df$color,cex=0.5,pch=19,ylab='y',xlab='x',main=paste('Clustering:',iter))
points(x=centroids$x, y=centroids$y,cex=1.5,pch=23,col='red')
return (df)
}
Centroids = intial_Centroids
df= assignment(df,Centroids,k,1,colors)
head(df)
## x y Distance_from_1 Distance_from_2 Distance_from_3
## 1 -13.444819 4.463472 94.67149 82.79937 87.28534
## 2 5.197805 26.989491 71.67655 77.32320 58.64648
## 3 -15.004434 14.828633 84.63812 88.08710 82.15963
## 4 -15.463153 17.912041 81.67425 89.83152 80.83027
## 5 -18.964604 7.478121 92.57031 89.00012 89.67948
## 6 19.279155 -5.429700 105.82957 48.64243 74.82356
## closest color
## 1 2 #1F78B4
## 2 3 #B2DF8A
## 3 3 #B2DF8A
## 4 3 #B2DF8A
## 5 2 #1F78B4
## 6 2 #1F78B4
# ploting first cluster result
The new centroids are the means of each cluster
# function for update centroids
update= function(centroids,k,df){
for (i in 1:k){
centroids[i,1] = mean(df[df$closest==i,]$x)
centroids[i,2] = mean(df[df$closest==i,]$y)}
plot(df$x,df$y,col=df$color,cex=0.5,pch=19,ylab='y',xlab='x',main="New centroids ")
points(x=centroids$x, y=centroids$y,cex=1.5,pch=23,col='red')
return (centroids)}
Centroids = update(Centroids,k,df)
### Cluster the data with new centroids
# run the second clustering
df= assignment(df,Centroids,k,2,colors)
# updat the centroids again
Centroids = update(Centroids,k,df)
# run the third clustering
df= assignment(df,Centroids,k,3,colors)
# updat the centroids again
Centroids = update(Centroids,k,df)
# run the fourth clustering
df= assignment(df,Centroids,k,4,colors)
Centroids = update(Centroids,k,df)
# run the fifth clustering
df= assignment(df,Centroids,k,5,colors)
Centroids = update(Centroids,k,df)
Now we get 3 clear clusters
# df = dataset
# iteration = nunmber of iteration
# k =number of cluster
Kmean_2D= function(df, k,iteration){
# plot the Orginal dataset
plot(df$x, df$y,cex=0.5,pch=19,main="Orginal df")
# setup color
colors = RColorBrewer::brewer.pal(n = k, name = 'Paired')
# initial X coordinates of random centroids
index_centriods = sample(1:nrow(df),k)
intial_Centroids = df[index_centriods,]
# ploting the initial centroids
plot(df$x, df$y,cex=0.5,pch=19,main="Orginal df")
points(x=intial_Centroids$x, y=intial_Centroids$y,cex=1.5,pch=23,col='red')
Centroids = intial_Centroids
for (iter in 1:iteration){
df= assignment(df,Centroids,k,iter,colors)
if (iter!=iteration){
Centroids = update(Centroids,k,df)}
}
}
set.seed(505)
f1 =c(runif(150,-20,30),runif(150,15,65),runif(150,50,100),runif(150,80,120),runif(200,-20,100))
f2 =c(runif(150,-10,30),runif(150,40,80),runif(150,-30,10),runif(150,30,80),runif(200,-35,100))
df=data.frame('x'=f1,'y'=f2)
Kmean_2D(df,k=4,6)